*************************************************************************************************
*																								*
*						Flexible Wages, Bargaining, and The Gender Gap							*
*						Barbara Biasi and Heather Sarsons										*
*						Paper Figures															*
*																								*
*************************************************************************************************

clear all
set more off
set matsize 11000
set maxvar 32000


global user = 2 // 1 = Heather, 2 = Barbara

if $user == 1 {
cd "/Users/sarsons/Dropbox/wisconsin_women/data"
global tab = "/Users/sarsons/Dropbox/wisconsin_women/draft/tables"
global out = "/Users/sarsons/Dropbox/wisconsin_women/draft/tex/QJE_resubmission/QJE_files"
global do = "/Users/sarsons/Dropbox/wisconsin_women/do"
global RA = "/Users/sarsons/Dropbox/wisconsin_women/do/calvin"
}
if $user == 2 {
cd "~/Dropbox/Research/wisconsin_women/data"
global tab = "~/Dropbox/Research/wisconsin_women/draft/tables"
global out = "~/Dropbox/Research/wisconsin_women/draft/tex/QJE_resubmission/QJE_files"
global do = "~/Dropbox/Research/wisconsin_women/do"
global RA = "~/Dropbox/Research/wisconsin_women/do/calvin"
}

* Yale colors
global yaleblue = "0 53 107"
global ylb = "40 109 192"
global yo = "189 83 25"
global ylight = "217 233 242"
global graph = "xlabel(,grid glp(dot) glc(gs10) tlc(gs10)) ylabel(,grid glp(dot) glc(gs10) tlc(gs10)) plotregion(lp(blank))"
global graphbar = "ylabel(,grid glp(dot) glc(gs10) tlc(gs10)) plotregion(lp(blank))"

do $do/QJE_R1/preamble_revision.do
replace logsalary = logsalary * 100
	global itt = "i.dist_itt i.dist_itt#i.postexp i.totalexp i.totalexp#i.postexp i.master i.master#i.postexp i.phd i.phd#i.postexp i.high i.high#i.postexp i.math i.math#i.postexp i.year i.year##i.expire"
	global expD = "i.district_code i.district_code#i.postexp i.district_code#i.totalexp i.district_code#i.totalexp#i.postexp i.district_code#i.master i.district_code#i.master#i.postexp i.district_code#i.phd i.district_code#i.phd#i.postexp i.high i.high#i.postexp i.math i.math#i.postexp i.year i.year##i.expire"
	g reltime = year-extension
	g endtime = 1 if reltime==3
	g starttime = 1 if reltime==-3
	bys id: egen fullstart = max(starttime) 
	by id: egen fullend = max(endtime)
	g balanced3 = (fullstart==1&fullend==1)
	drop endtime starttime fullstart fullend	
	g district2010 = district_code if year==2010
	bys id: egen dist_itt = max(district2010)
	drop district2010
	tempvar x
	g `x' = entry == 1 & totalexp == 1 & year <= 2011
	bysort id: egen entry_pre = max(`x')
keep if timex > -6
label var zero "0"

*-------------------------> 		Main file		<-------------------------*


*--> Figure 2: Timing of CBA expiration and extension
preserve
gen t = 1
bysort expire: egen Exp = sum(t)
bysort extension: egen Ext = sum(t)
drop year
gen year = 2010 + _n if _n < 7
gen EXP = .
gen EXT = .
forvalues e = 2011/2013 {
qui sum Exp if expire == `e'
replace EXP = `r(mean)' if year == `e'
}
forvalues e = 2011/2014 {
qui sum Ext if extension == `e'
replace EXT = `r(mean)' if year == `e'
}
qui sum Ext if extension == 2016
replace EXT = `r(mean)' if year == 2016

gen N = _N
replace EXP = EXP / N
replace EXT = EXT / N
graph bar EXP EXT, over(year) bar(1, color(black % 70)) bar(2, color(gs8 % 70)) ytitle("share teachers") $graphbar legend(order(1 "by CBA expiration" 2 "by CBA extension") region(lwidth(none)) pos(6) row(1))   
graph export "$out/CBA_expiration_repl.tif", replace height(600)
restore



*--> Figure 3: Unconditional salaries
* Panel (a): All districts
preserve
keep if abs(timex) <= 5
replace salary_n = salary_n / 1000
qui tab timex, gen(T)
reg salary_n T1-T`r(r)' femx_5-femx_15, cluster(district_code)
local df = `e(df_r)'
gen sd = .
qui tab timex
forvalues t = 1/`r(r)' {
	local c = `t' + 4
	replace sd = invttail(`df'-1,0.025)*(_se[femx_`c']) if T`t' == 1
}
collapse salary_n sd, by(female timex)
gen up = salary + sd if female == 1
gen down = salary - sd if female == 1
twoway	(rarea up down timex if female == 1, color(black%30) lp(dash) lcolor(white)) ///
		(connected salary timex if female == 1, color(black) lp(dash)) ///
		(connected salary timex if female == 0, color(gs8) lp(solid)) ///
		, legend(order(2 "women" 3 "men") region(lwidth(none)) pos(6) row(1)) $graph ///
		xtitle("time to CBA expiration/extension") ytitle("salary ($1,000)") xlabel(-5(1)5) ylabel(48(2)58) xline(0.5) xsize(6)
graph export "$out/salary_raw_time_repl.tif", replace height(600)
restore		

* Panel (b):Extension in 2011
preserve
drop femx_* fem_*
qui tab year, gen(T)
forvalues t = 1/`r(r)' {
gen fem_`t' = female * T`t'
}
replace salary_n = salary_n / 1000
reg salary_n T1-T`r(r)' fem_* if extension == 2011, cluster(district_code)
local df = `e(df_r)'
gen sd = .
qui tab year
forvalues t = 1/`r(r)' {
replace sd = invttail(`df'-1,0.025)*(_se[fem_`t']) if T`t' == 1
}
keep if extension == 2011
collapse salary_n sd, by(female year)
gen up = salary + sd if female == 1
gen down = salary - sd if female == 1
twoway 	(rarea up down year if female == 1, color(black%30) lp(dash) lcolor(white)) ///
		(connected salary year if female == 1, color(black) lp(dash)) ///
		(connected salary year if female == 0, color(gs8) lp(solid)) ///
		if year > 2007, legend(order(2 "women" 3 "men") pos(6) row(1) region(lwidth(none))) $graph ///
		xtitle("year") ytitle("salary ($1,000)")  xlabel(2008(1)2016) xline(2011.5)  ylabel(48(2)58) xsize(6)
graph export "$out/salary_raw_year_ext2011.tif", replace height(600)
restore	

* Panel (c):Extension in 2012
preserve
drop femx_* fem_*
qui tab year, gen(T)
forvalues t = 1/`r(r)' {
gen fem_`t' = female * T`t'
}
replace salary_n = salary_n / 1000
reg salary_n T1-T`r(r)' fem_* if extension == 2012, cluster(district_code)
local df = `e(df_r)'
gen sd = .
qui tab year
forvalues t = 1/`r(r)' {
replace sd = invttail(`df'-1,0.025)*(_se[fem_`t']) if T`t' == 1
}
keep if Extension == 2012
collapse salary_n sd, by(female year)
gen up = salary + sd if female == 1
gen down = salary - sd if female == 1
twoway 	(rarea up down year if female == 1, color(black%30) lp(dash) lcolor(white)) ///
		(connected salary year if female == 1, color(black) lp(dash)) ///
		(connected salary year if female == 0, color(gs8) lp(solid)) ///
		if year > 2007, legend(order(2 "women" 3 "men") pos(6)  row(1) region(lwidth(none))) $graph ///
		xtitle("year") ytitle("salary ($1,000)")  xlabel(2008(1)2016) xline(2012.5)  ylabel(48(2)58) xsize(6)
graph export "$out/salary_raw_year_ext2012.tif", replace height(600)
restore	

* Panel (d):Extension after 2012
preserve
drop femx_* fem_*
tab year, gen(T)
forvalues t = 1/`r(r)' {
gen fem_`t' = female * T`t'
}
replace salary_n = salary_n / 1000
reg salary_n T1-T`r(r)' fem_* if extension > 2012 & extension != ., cluster(district_code)
local df = `e(df_r)'
gen sd = .
qui tab year
forvalues t = 1/`r(r)' {
replace sd = invttail(`df'-1,0.025)*(_se[fem_`t']) if T`t' == 1
}
keep if Extension > 2012 & Extension != .
collapse salary_n sd, by(female year)
gen up = salary + sd if female == 1
gen down = salary - sd if female == 1
twoway 	(rarea up down year if female == 1, color(black%30) lp(dash) lcolor(white)) ///
		(connected salary year if female == 1, color(black) lp(dash)) ///
		(connected salary year if female == 0, color(gs8) lp(solid)) ///
		if year > 2007, legend(order(2 "women" 3 "men") pos(6)  row(1) region(lwidth(none))) $graph ///
		xtitle("year") ytitle("salary ($1,000)")  xlabel(2008(1)2016) xline(2013.5)  ylabel(48(2)58)  xsize(6)
graph export "$out/salary_raw_year_extpost2012.tif", replace height(600)
restore	


*--> Figure 4a: Salaries of men and women
preserve
	qui reghdfe logsalary, a($exp i.extension##i.year) resid
	predict res, resid
	label var zero "0"

	eststo fem: reg res D_5-D_9 zero D_11-D_15 if female == 1, cluster(district_code)
	eststo male: reg res D_5-D_9 zero D_11-D_15 if female == 0, cluster(district_code)
	coefplot	(fem, label("women") color(black) lw(thick) fcolor(none) ///
				recast(connect) ciopts(lcolor("$yo %30") recast(rcap)))   ///
				(male, label("men") lcolor(gs8) lw(thick) lp(dash) fcolor(none) ///
				recast(connect) ciopts(lcolor("$ylb %30") recast(rcap)))  ///
				 , vert keep(D_5 D_6 D_7 D_8 D_9 zero D_11 D_12 D_13 D_14 D_15) omitted ///
				 yline(0, lcolor(gs10) lw(vthin)) xline(6.5, lc(black)) ///
				 level(90) ytitle("conditional salary (100*log)") xtitle("time to CBA expiration/extension")  ///
				 $graph legend(row(1) region(lwidth(none)) pos(6))
	graph export "$out/salaries_men_women_ext_repl.tif", replace height(600)
restore

label var zero "0"


*--> Figure 4b: Gender wage gap with Abraham-Sun graph included:

preserve
	drop reltime
	do $do/QJE_R1/replication/AS_gap_repl_BB.do

	* Make graph
	clear all
	program varprocess
	{
		
		g l = _n-6

		keep if l<=5
		
		rename b2way_hrs1 FE
		rename biw_hrs1 IW
		rename biw_hrs3 CATT_1
		rename biw_hrs5 CATT_2
		rename biw_hrs7 CATT_3
		rename biw_hrs9 CATT_4
		
		rename sd1 FE_sd
		rename sd2 IW_sd
		rename sd2011 CATT_1_sd
		rename sd2012 CATT_2_sd
		rename sd2013 CATT_3_sd
		rename sd2014 CATT_4_sd

		foreach var in FE IW CATT_1 CATT_2 CATT_3 CATT_4 {
			g upperCI_`var' = `var' + `var'_sd*1.96
			g lowerCI_`var' = `var' - `var'_sd*1.96
		}
	}
	end

	insheet using weighted_estimates.csv, clear 

	varprocess
	gen l_FE = l - 0.05
	gen l_IW = l + 0.05
													 	
	twoway rcap upperCI_FE lowerCI_FE l_FE, lp(line) color(black) || scatter FE l_FE , c(l) mc(black) msize(medlarge) m(O) lw(thick) lc(black) ///
		|| rcap upperCI_IW lowerCI_IW l_IW, lp(line) color(gs8) || scatter IW l_IW , c(l) mc(gs8) msize(medlarge) lp(dash) lw(thick) m(T) lc(gs8) ///
		graphreg(fc(white)) ytitle("conditional salary (100*log)") xtitle("time to CBA expiration/extension") xla(-5(1)5) xline(0.5) ///
		legend(order(2 "OLS estimates" 4 "Sun & Abraham estimates") row(1) pos(6))
	graph export $out/TWFE_SA_estimates.tif, replace height(600)	 
restore


*--> Figure 5: Gap by experience and age

*-> by Experience
eststo young: 	reghdfe logsalary femx_5-femx_9 zero femx_11-femx_15 female if totalexp <= 6, a($exp i.extension##i.year) vce(cluster district_code)
eststo old: 	reghdfe logsalary femx_5-femx_9 zero femx_11-femx_15 female if totalexp > 20, a($exp i.extension##i.year) vce(cluster district_code)
coefplot	(young, label("experience <= 6 (25th pctile)") lcolor(black) lw(thick) fcolor(none) mcolor(black) recast(connect) ciopts(lcolor("$yo %30") recast(rcap)))   ///
			(old, label("experience > 20 (75th pctile)") lcolor(gs8) lp(dash) lw(thick) fcolor(none) mcolor(gs8) recast(connect) ciopts(lcolor("$ylb %30") recast(rcap)))  ///
			 , vert keep(femx_5 femx_6 femx_7 femx_8 femx_9 zero femx_11 femx_12 femx_13 femx_14 femx_15) omitted yline(0, lcolor(gs10) lw(vthin)) xline(6.5) ///
			 level(90) ytitle("conditional salary (100*log)") xtitle("time to CBA expiration/extension")  $graph legend(row(1) region(lwidth(none)) pos(6))
graph export "$out/gap_byexp_ext_repl.tif", replace height(600)


*-> by Age
gen age = year - birth
replace age = . if age < 22 | age > 80
eststo young: 	reghdfe logsalary femx_5-femx_9 zero femx_11-femx_15 female if age <= 32, a($exp i.extension##i.year) vce(cluster district_code)
eststo old: 	reghdfe logsalary femx_5-femx_9 zero femx_11-femx_15 female if age > 50, a($exp i.extension##i.year) vce(cluster district_code)
coefplot	(young, label("age <= 32 (25th pctile)") lcolor(black) lw(thick) fcolor(none) mcolor(black) recast(connect) ciopts(lcolor("$yo %30") recast(rcap)))   ///
			(old, label("age > 50 (75th pctile)") lcolor(gs8) lw(thick) lp(dash) fcolor(none) mcolor(gs8) recast(connect) ciopts(lcolor("$ylb %30") recast(rcap)))  ///
			 , vert keep(femx_5 femx_6 femx_7 femx_8 femx_9 zero femx_10 femx_11 femx_12 femx_13 femx_14 femx_15) omitted yline(0, lcolor(gs10) lw(vthin)) xline(6.5) ///
			 level(90) ytitle("conditional salary (100*log)") xtitle("time to CBA expiration/extension")  $graph legend(row(1) region(lwidth(none)) pos(6))
graph export "$out/gap_byage_ext_repl.tif", replace height(600)


	 
*--> Figure 6A: Gap, by mobility
label var zero "0"
tempvar x
gen `x' = year if mover_d == 1
bysort id: egen mover_year = max(`x')

eststo nonmovers: 	reghdfe logsalary femx_5-femx_9 zero femx_11-femx_15 female if timex > -7 & mover_year == ., a($exp i.extension##i.year) vce(cluster district_code)
eststo movers: 		reghdfe logsalary femx_5-femx_9 zero femx_11-femx_15 female if timex > -7 & mover_year != ., a($exp i.extension##i.year) vce(cluster district_code)
eststo movers_post: reghdfe logsalary femx_5-femx_9 zero femx_11-femx_15 female if timex > -7 & mover_year !=. & mover_year > extension, a($exp i.extension##i.year) vce(cluster district_code)
coefplot	(nonmovers, label("non-movers") lcolor(black) lw(thick) fcolor(none) mcolor(black) recast(connect) ciopts(lcolor(black %30) recast(rcap)))  ///
			(movers, label("movers (ever)") lcolor(gs8) lw(thick) fcolor(none) mcolor(gs8) recast(connect) ciopts(lcolor(gs8 %30) recast(rcap)))   ///
			(movers_post, label("movers (post-extension)") lcolor(gs12) lw(thick) fcolor(none) mcolor(gs12) recast(connect) ciopts(lcolor(gs12 %30) 			recast(rcap)))   ///
			 , vert keep(femx_5 femx_6 femx_7 femx_8 femx_9 zero femx_10 femx_11 femx_12 femx_13 femx_14 femx_15) omitted yline(0, lcolor(gs10) lw(vthin)) xline(6.5) ///
			 level(90) ytitle("conditional salary (100*log), women vs men") xtitle("time to CBA expiration/extension") $graph legend(region(lwidth(none)) pos(6) row(1))
graph export "$out/gap_bymob_repl.tif", replace height(600)


*--> Figure 6B: Gap, by nr schools in CZ

cap drop many_schools
sum pre_cz_schools, d
g many_schools = 1 if (pre_cz_schools>=`r(p25)')
replace many_schools = . if pre_cz_schools == .
replace many_schools = 0 if (pre_cz_schools<=`r(p25)')
replace many_schools = . if pre_cz_schools == .

eststo baseline_ext_pp: reghdfe logsalary femx_5-femx_9 zero femx_11-femx_15 female if timex > -7 & many_schools==1, a($exp i.extension##i.year) vce(cluster district_code)
eststo baseline_ext_ss: reghdfe logsalary femx_5-femx_9 zero femx_11-femx_15 female if timex > -7 & many_schools==0, a($exp i.extension##i.year) vce(cluster district_code)
coefplot	(baseline_ext_pp, label("> 75th pctile") lcolor(black) lw(thick) fcolor(none) mcolor(black) recast(connect) ciopts(lcolor(black % 30) recast(rcap)))   ///
			(baseline_ext_ss, label("< 25th pctile") lcolor(gs8) lp(dash) lw(thick) fcolor(none) mcolor(gs8) recast(connect) ciopts(lcolor(gs8 % 30) recast(rcap)))  ///
			 , vert keep(femx_5 femx_6 femx_7 femx_8 femx_9 zero femx_10 femx_11 femx_12 femx_13 femx_14 femx_15) omitted yline(0, lcolor(gs10) lw(vthin)) xline(6.5) ///
			 level(90) ytitle("conditional salary (100*log)") xtitle("time to CBA expiration/extension") legend(region(lwidth(none)) pos(6) row(1) title("nr of schools in CZ:")) $graph
graph export "$out/gap_bynum_schools_repl.tif", replace height(600)

